#!/usr/bin/env groovy
package scripts;
/**
 * This script parses a set of entrez gene XML files and outputs a csv file.
 */

import org.biogroovy.io.*;
import org.biogroovy.models.Gene;
import org.biogroovy.eutils.*;

String genelist = "/Users/markfortner/Documents/pancreatic_cancer/GeneLists/pcgene_db2.xml"
def genedata = new XmlSlurper().parse(new File(genelist));

genedata.paper.each(){
	File outputFile = new File("/Users/markfortner/Documents/pancreatic_cancer/GeneLists/${it.@pmid}.list");
	
	
	it.gene.'@gene-id'.each(){
		
		if (it != null && !it.equals("")){
			try {
				InputStream is = new BufferedInputStream(new URL("http://www.bio2rdf.org/rdfxml/geneid:${it}").openStream())
				OutputStream os = new FileOutputStream("/Users/markfortner/Documents/pancreatic_cancer/data/${it}.rdf.xml");
				os << is;
				os.flush();
				os.close();
				println "Writing file: ${it}.rdf.xml"
			} catch (Exception ex){
				ex.printStackTrace();
			}
		}
	}
	
}


//String dirStr = "/Users/markfortner/Documents/pancreatic_cancer/data";
//File dir = new File(dirStr);
//
//EntrezGeneReader reader = new EntrezGeneReader();
//Gene2CSVWriter writer = new Gene2CSVWriter();
//OutputStream os = new FileOutputStream(new File(dirStr +"/data.csv"));
//Gene gene = null;
//dir.eachFileMatch( ~".*.xml",{ 
//	println "processing: ${it}"
//	gene = reader.readFile("${it}");
//	writer.output(gene, os);
//});


